library(tidyverse)
## -- Attaching packages ----------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts -------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(viridis)
## Loading required package: viridisLite
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(readr)
library(p8105.datasets)
data("instacart")

instacart %>% 
  janitor::clean_names()
## # A tibble: 1,384,617 x 15
##    order_id product_id add_to_cart_ord~ reordered user_id eval_set
##       <int>      <int>            <int>     <int>   <int> <chr>   
##  1        1      49302                1         1  112108 train   
##  2        1      11109                2         1  112108 train   
##  3        1      10246                3         0  112108 train   
##  4        1      49683                4         0  112108 train   
##  5        1      43633                5         1  112108 train   
##  6        1      13176                6         0  112108 train   
##  7        1      47209                7         0  112108 train   
##  8        1      22035                8         1  112108 train   
##  9       36      39612                1         0   79431 train   
## 10       36      19660                2         1   79431 train   
## # ... with 1,384,607 more rows, and 9 more variables: order_number <int>,
## #   order_dow <int>, order_hour_of_day <int>,
## #   days_since_prior_order <int>, product_name <chr>, aisle_id <int>,
## #   department_id <int>, aisle <chr>, department <chr>
# Create boxplot comparing order hour of day of fresh vs. frozen items
instacart %>% 
  filter(str_detect(aisle, "frozen|fresh ")) %>% 
  mutate(aisle = fct_reorder(aisle, order_hour_of_day)) %>% 
  plot_ly(y = ~order_hour_of_day, color = ~aisle, type = "box", colors = "Set2")
instacart %>% 
  filter(aisle == "fresh vegetables") %>% 
  mutate(order_dow = as.character(order_dow),
         order_dow = fct_reorder(order_dow, order_hour_of_day)) %>% 
  plot_ly(y = ~order_hour_of_day, color = ~order_dow, type = "box", 
          colors = "Set2") 
top_aisles = instacart %>% 
  count(aisle, sort = TRUE) %>% 
  top_n(10) %>% 
  select(aisle)
## Selecting by n
inner_join(instacart, top_aisles,
             by = "aisle") %>% 
  mutate(aisle = fct_reorder(aisle, order_hour_of_day)) %>% 
  plot_ly(y = ~order_hour_of_day, color = ~aisle, type = "box", colors = "Set2")
instacart %>%
  group_by(aisle) %>%
  summarize(n = n()) %>%
  arrange(-n) %>% 
  top_n(10) %>% 
  mutate(aisle = fct_reorder(aisle, n)) %>% 
  plot_ly(x = ~aisle, y = ~n, color = ~aisle, type = "bar")
## Selecting by n
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
instacart %>% 
  filter(str_detect(aisle, "fresh ")) %>% 
  select(aisle, order_dow, order_hour_of_day) %>% 
  arrange(aisle, order_dow) %>% 
  group_by(aisle, order_dow) %>% 
  mutate(mean_hour = mean(order_hour_of_day)) %>% 
  plot_ly(x = ~order_dow, y = ~mean_hour, type = "scatter", mode = "line", color = ~aisle)